*************************************************************
* Name: Peter Kuhn, Trevor Osaki, Lei Yue                 	*
* Purpose: Organize and Tabulate data from MTurk Experiment *
* Date: October 2024                                        *
*************************************************************

*** Set proper directory and obtain raw Qualtrics data set ***
clear

global homedir ""/Users/leiyue/Desktop/Projects/WhenIsDicrimUnfair/Replication_Final/""  

cd $homedir
cd "1_Dataset Construction"

insheet using aa_when_is_discrimination_unfair_raw_data.csv


*** Drop all unnecessary variables ***
drop startdate enddate status progress finished responseid recordeddate recipientlastname recipientfirstname recipientemail externalreference distributionchannel userlanguage triangle hand bird dog t_firstclick t_lastclick t_pagesubmit t_clickcount v28 v29 v30 v31 v33 v34 v35 v36 v38 v39 v40 v41 v47 v48 v49 v50 v52 v53 v54 v55 v61 v62 v63 v64 v66 v67 v68 v69 v75 v76 v77 v78 v80 v81 v82 v83 v85 v86 v87 v88 v90 v91 v92 v93 v99 v100 v101 v102 v104 v105 v106 v107 v109 v110 v111 v112 v114 v115 v116 v117 v140 v141 v142 v143 fl_676_do fl_167_do fl_182_do fl_270_do fl_282_do fl_331_do fl_223_do fl_358_do fl_313_do fl_340_do fl_263_do fl_595_do fl_604_do fl_613_do fl_207_do fl_622_do fl_632_do fl_641_do fl_405_do fl_414_do fl_416_do fl_430_do fl_444_do fl_463_do fl_465_do fl_477_do fl_487_do fl_502_do fl_504_do fl_514_do fl_524_do fl_539_do fl_541_do fl_551_do fl_561_do


*** We only need one BRO variable ***
* Given that the survey program has multiple "paths," based on the treatment(s) -- there are multiple variables corresponding to respondent's BRO choice.
* Thus, each variable corresponds to these paths. One of these is named "bro" on default. Others are named, e.g., "v45" or "v57."
* Here, I condense all the BRO respondents into a single variable -- "bro."
local extra v45 v57 v59 v71 v73 v95 v97 v119 v121 v123 v125 v127 v129 v131 v133 

foreach i of local extra {
    replace bro = `i' if bro == "" & `i' != ""

}

drop v45 v57 v59 v71 v73 v95 v97 v119 v121 v123 v125 v127 v129 v131 v133 


*** Condense responses for Michael and Andrew Scenarios ***
* The intermediate variables are labeled as followed:
* b/w = Black/White discriminatee
* t_e/t_c = employer/customer tastes
* s_i/s_a = inaccurate/accurate information (for statistical scenarios)
* "_a" means the name of the discriminatee was "Andrew," not "Michael."
replace bt_e = bt_e_a if bt_e == "" & bt_e_a != ""
replace bt_c = bt_c_a if bt_c == "" & bt_c_a != ""
replace bs_i = bs_i_a if bs_i == "" & bs_i_a != ""
replace bs_a = bs_a_a if bs_a == "" & bs_a_a != ""

replace wt_e = wt_e_a if wt_e == "" & wt_e_a != ""
replace wt_c = wt_c_a if wt_c == "" & wt_c_a != ""
replace ws_i = ws_i_a if ws_i == "" & ws_i_a != ""
replace ws_a = ws_a_a if ws_a == "" & ws_a_a != ""

drop bt_e_a bt_c_a bs_i_a bs_a_a wt_e_a wt_c_a ws_i_a ws_a_a 


*** rename so I can eventually put data into a longer format ***
* Each row contains a fairness assessment for a respondent -- for a single scenario.
* Thus, the data is a "panel."
rename bt_e fair1
rename bt_c fair2
rename bs_i fair3
rename bs_a fair4
rename wt_e fair5
rename wt_c fair6
rename ws_i fair7
rename ws_a fair8

*Reshape data to long format  ***
reshape long fair, i(randomid)


*** Define dummies for Black treatment and types of discrimination ***
* This is based on the fair1,...,fair8 variables from above
gen black = 1 if _j == 1 | _j == 2 | _j == 3 | _j == 4 
replace black = 0 if black ==.
gen white = (black == 0)

gen stat = (_j == 3 | _j == 4 | _j == 7 | _j == 8)
gen taste = (stat == 0)

gen employer_taste = 1 if _j == 1 | _j == 5
replace employer_taste = 0 if employer_taste ==.

gen customer_taste = 1 if _j == 2 | _j == 6
replace customer_taste = 0 if customer_taste ==.

gen inaccurate_stat = 1 if _j == 3 | _j == 7
replace inaccurate_stat = 0 if inaccurate_stat ==.

gen accurate_stat = 1 if _j == 4 | _j == 8
replace accurate_stat = 0 if accurate_stat ==.

drop _j


*** Set up dummies for the justifiability of discriminatory action ***
gen low_just = (employer_taste == 1 | inaccurate_stat == 1)
gen high_just = (customer_taste == 1 | accurate_stat == 1)


*** Get rid of blank "fair" observations ***
drop if fair == ""


*** Generate single variable for open-ended responseid ***
* The raw data has multiple variables, e.g., "bt_ef_a" that provides the open response to employer taste-based 
* discrimination against a Black applicant (when the employer is "Andrew"). For each respondent, only one of these
* variables is populated with their answer (if any) -- and it indicates to the last scenario they encountered.
* Below generates a single variable for open responses. The rows corresponding to this variable are only populated if
* the response corresponds to the respective fairness assessment.
gen open_response = ""

gen andrew = 1 if bt_e_f_a != "" | bt_c_f_a != "" | bs_i_f_a != "" | bs_a_f_a != "" | wt_e_f_a != "" | wt_c_f_a != "" | ws_i_f_a != "" | ws_a_f_a != ""

replace open_response = bt_e_f if black == 1 & employer_taste == 1
replace open_response = bt_e_f_a if black == 1 & employer_taste == 1 & andrew == 1

replace open_response = bt_c_f if black == 1 & customer_taste == 1
replace open_response = bt_c_f_a if black == 1 & customer_taste == 1 & andrew == 1

replace open_response = bs_i_f if black == 1 & inaccurate_stat == 1
replace open_response = bs_i_f_a if black == 1 & inaccurate_stat == 1 & andrew == 1

replace open_response = bs_a_f if black == 1 & accurate_stat == 1
replace open_response = bs_a_f_a if black == 1 & accurate_stat == 1 & andrew == 1

replace open_response = wt_e_f if black == 0 & employer_taste == 1
replace open_response = wt_e_f_a if black == 0 & employer_taste == 1 & andrew == 1

replace open_response = wt_c_f if black == 0 & customer_taste == 1
replace open_response = wt_c_f_a if black == 0 & customer_taste == 1 & andrew == 1

replace open_response = ws_i_f if black == 0 & inaccurate_stat == 1
replace open_response = ws_i_f_a if black == 0 & inaccurate_stat == 1 & andrew == 1

replace open_response = ws_a_f if black == 0 & accurate_stat == 1
replace open_response = ws_a_f_a if black == 0 & accurate_stat == 1 & andrew == 1

* This keeps tabs on the type of scenario described by the respondent in the open response follow-up question.
* This was useful in identifying which scenarios are in stage 1 or 2.
gen open_response2 = ""

replace open_response2 = "bt_e_f" if black == 1 & employer_taste == 1 & open_response != ""
replace open_response2 = "bt_e_f_a" if black == 1 & employer_taste == 1 & andrew == 1 & open_response != ""

replace open_response2 = "bt_c_f" if black == 1 & customer_taste == 1 & open_response != ""
replace open_response2 = "bt_c_f_a" if black == 1 & customer_taste == 1 & andrew == 1 & open_response != ""

replace open_response2 = "bs_i_f" if black == 1 & inaccurate_stat == 1 & open_response != ""
replace open_response2 = "bs_i_f_a" if black == 1 & inaccurate_stat == 1 & andrew == 1 & open_response != ""

replace open_response2 = "bs_a_f" if black == 1 & accurate_stat == 1 & open_response != ""
replace open_response2 = "bs_a_f_a" if black == 1 & accurate_stat == 1 & andrew == 1 & open_response != ""

replace open_response2 = "wt_e_f" if black == 0 & employer_taste == 1 & open_response != ""
replace open_response2 = "wt_e_f_a" if black == 0 & employer_taste == 1 & andrew == 1 & open_response != ""

replace open_response2 = "wt_c_f" if black == 0 & customer_taste == 1 & open_response != ""
replace open_response2 = "wt_c_f_a" if black == 0 & customer_taste == 1 & andrew == 1 & open_response != ""

replace open_response2 = "ws_i_f" if black == 0 & inaccurate_stat == 1 & open_response != ""
replace open_response2 = "ws_i_f_a" if black == 0 & inaccurate_stat == 1 & andrew == 1 & open_response != ""

replace open_response2 = "ws_a_f" if black == 0 & accurate_stat == 1 & open_response != ""
replace open_response2 = "ws_a_f_a" if black == 0 & accurate_stat == 1 & andrew == 1 & open_response != ""

drop bt_e_f bt_e_f_a bt_c_f bt_c_f_a bs_i_f bs_i_f_a bs_a_f bs_a_f_a wt_e_f wt_e_f_a wt_c_f wt_c_f_a ws_i_f ws_i_f_a ws_a_f ws_a_f_a andrew 


*** Recode fairness assessment ***
gen fairness = 1

replace fairness = 2 if fair == "unfair"
replace fairness = 3 if fair == "somewhat unfair"
replace fairness = 4 if fair == "neither fair nor unfair"
replace fairness = 5 if fair == "somewhat fair"
replace fairness = 6 if fair == "fair"
replace fairness = 7 if fair == "very fair"

drop fair
rename fairness fair


***Recode BRO assessment ***
gen bro2 = 1
replace bro2 = 2 if bro == "Black people have less opportunities than White people"
replace bro2 = 3 if bro == "Black people have a little less opportunity than White people"
replace bro2 = 4 if bro == "Black and White people have roughly equal opportunities"
replace bro2 = 5 if bro == "Black people have a little more opportunity than White people"
replace bro2 = 6 if bro == "Black people have more opportunities than White people"
replace bro2 = 7 if bro == "Black people have much more opportunities than White people"

drop bro
rename bro2 bro

* Create a race dummy
gen non_white = 1
replace non_white = 0 if race == "White"


*** Create some dummies to tag suspicious observations ***
* These responses are highly suspected to be automated responses.
* This is based on their open responses.
merge m:m randomid using hh_suspicious_variables2

drop _merge

gen extra = 1 if suspicious == "extra"
replace extra = 0 if extra ==.

* Respondents seemingly copied text from the internet
gen copy_internet = 1 if suspicious == "copy"
replace copy_internet = 0 if copy_internet ==.

* Respondents seemingly copied text word-for-word from the survey
gen copy_survey = 1 if suspicious == "copy from survey"
replace copy_survey = 0 if copy_survey ==.

* Open response is completely irrelevent to the encountered scenario
gen unrelated = 1 if suspicious == "unrelated"
replace unrelated = 0 if unrelated ==.

* Some responses are very low-quality (and pasted words similar to that of our text) -- likely to be automated responses
gen low = 1 if suspicious == "low"
replace low = 0 if low ==. 

* IP addresses of these are identical to another earlier response
gen same = 1 if suspicious == "same"
replace same = 0 if same ==. 

* Geocoordinates suggests the respondent may not be of the U.S.
gen foreign = 1 if suspicious == "foreign"
replace foreign = 0 if foreign ==. 

drop if extra == 1

drop suspicious


* Create gender dummies
gen male = 1 if gender == "Male"
replace male = 0 if male==.

gen female = 1 if gender == "Female"
replace female = 0 if female==.

gen gender_decline = 1 if gender == "Other/decline to state"
replace gender_decline = 0 if gender_decline ==.


*Create race dummies
gen race_white = 1 if race == "White"
replace race_white = 0 if race_white ==.

gen race_black = 1 if race == "Black or African American"
replace race_black = 0 if race_black ==.

gen race_asian = 1 if race == "Asian"
replace race_asian = 0 if race_asian==.

gen race_hispanic = 1 if race == "Hispanic, Latino, or Spanish origin"
replace race_hispanic = 0 if race_hispanic==.

gen race_indigenous = 1 if race == "American Indian or Alaska Native"
replace race_indigenous = 0 if race_indigenous==.

gen race_islander = 1 if race == "Native Hawaiian or Other Pacific Islander"
replace race_islander = 0 if race_islander==.

gen race_other = 1 if race == "Other"
replace race_other = 0 if race_other==.

gen other = (non_white == 1 & race != "Black or African American")


* Create Age dummies
gen age18_24 = (age == "18-24")
gen age25_34 = (age == "25-34")
gen age35_44 = (age == "35-44")
gen age45_54 = (age == "45-54")
gen age55_64 = (age == "55-64")
gen age65_74 = (age == "65-74")
gen age75_84 = (age == "75-84")
gen age85 = (age == "85 and older")

gen age18_34 = (age18_24 == 1 | age25_34 == 1)
gen age55_over = (age55_64 == 1 | age65_74 == 1)


* Create education dummies
gen education_primary = (education == "Primary school or below (grades 1-8)")
gen education_hs = (education == "High School (grades 9-12)")
gen education_some_college = (education == "Some College (includes two-year college degrees)")
gen education_four_college = (education == "Four-year College or University Degree")
gen education_higher = (education == "Higher Degree (e.g. MD, MBA, Master's, PhD)")

gen education_hs_2year = (education_hs == 1 | education_some_college == 1)


* Political party preference dummies
gen party_democrat = (party == "Democrat")
gen party_republican = (party == "Republican")
gen party_independent = (party == "Independent or no party affiliation")
gen party_other = (party == "Other")

gen party_indep_other = (party_independent == 1 | party_other == 1)


* Spectrum dummies
gen extremely_conservative = (spectrum == "Extremely conservative")
gen conservative = (spectrum == "Conservative")
gen slightly_conservative = (spectrum == "Slightly Conservative")
gen moderate = (spectrum == "Moderate")
gen slightly_liberal = (spectrum == "Slightly liberal")
gen liberal = (spectrum == "Liberal")
gen extremely_liberal = (spectrum == "Extremely liberal")

gen conserv = (extremely_conservative == 1 | conservative == 1)
gen mod = (slightly_conservative == 1 | moderate == 1 | slightly_liberal == 1)
gen lib = (extremely_liberal == 1 | liberal == 1)


* Assign zeroes to some variables
replace rejected = 0 if rejected ==.
replace prefer_no_answer = 0 if prefer_no_answer ==.


*** Merge state-level data ***
* This is based on geocoordinates (omitted since they may facilitate PII) and obtained manually.
merge m:m randomid using gg_state_data.dta

drop _merge 


*** Identify scenarios belonging in Stage 1 and 2***
* Stage One scenarios were backed out manually in a separate file based off of the dataset so far
* These were identified using the fact that the race or justifiability treatment (or both) switches between stages
* And respondents only justify (in words) their fairness assessment for scenario #4.
preserve 

clear

insheet using dd_first_stage.csv

gen stage_two = (stage_one == 0)
sort randomid stage_two

*** Create more variables for race treatments ***
* E.g., "black1" takes on a value of "1" if a respondent encountered the Black treatment in Stage 1
gen black1 = 0
replace black1 = 1 if stage_one[_n - 1] == 1 & stage_two[_n] == 1 & stage_two[_n+1] == 1 & black[_n - 1] == 1
replace black1 = 1 if stage_one[_n - 1] == 0 & stage_two[_n] == 1 & (stage_two[_n+1] == 0 | stage_two[_n+1] ==.) & black[_n - 2] == 1

replace black1 = 1 if black == 1 & stage_one == 1

gen black2 = 0
replace black2 = 1 if stage_two[_n + 1] == 1 & stage_one[_n] == 1 & stage_one[_n-1] == 1 & black[_n+1] == 1
replace black2 = 1 if stage_two[_n + 1] == 0 & stage_one[_n] == 1 & (stage_one[_n-1] == 0 | stage_one[_n-1]==.) & black[_n + 2] == 1

replace black2 = 1 if black == 1 & stage_two == 1


gen white1 = 0
replace white1 = 1 if stage_one[_n - 1] == 1 & stage_two[_n] == 1 & stage_two[_n+1] == 1 & black[_n - 1] == 0
replace white1 = 1 if stage_one[_n - 1] == 0 & stage_two[_n] == 1 & (stage_two[_n+1] == 0 | stage_two[_n+1] ==.) & black[_n - 2] == 0

replace white1 = 1 if black == 0 & stage_one == 1

gen white2 = 0
replace white2 = 1 if stage_two[_n + 1] == 1 & stage_one[_n] == 1 & stage_one[_n-1] == 1 & black[_n+1] == 0
replace white2 = 1 if stage_two[_n + 1] == 0 & stage_one[_n] == 1 & (stage_one[_n-1] == 0 | stage_one[_n-1]==.) & black[_n + 2] == 0

replace white2 = 1 if black == 0 & stage_two == 1


* Create variables for scenario types (taste vs.statistical)
gen taste1 = 0
replace taste1 = 1 if stage_one[_n - 1] == 1 & stage_two[_n] == 1 & stage_two[_n+1] == 1 & taste[_n - 1] == 1
replace taste1 = 1 if stage_one[_n - 1] == 0 & stage_two[_n] == 1 & (stage_two[_n+1] == 0 | stage_two[_n+1] ==.) & taste[_n - 2] == 1

replace taste1 = 1 if taste == 1 & stage_one == 1

gen taste2 = 0
replace taste2 = 1 if stage_two[_n + 1] == 1 & stage_one[_n] == 1 & stage_one[_n-1] == 1 & taste[_n+1] == 1
replace taste2 = 1 if stage_two[_n + 1] == 0 & stage_one[_n] == 1 & (stage_one[_n-1] == 0 | stage_one[_n-1]==.) & taste[_n + 2] == 1

replace taste2 = 1 if taste == 1 & stage_two == 1

sort randomid stage_two

tempfile stage_one
save `stage_one'

restore 

* Merge variables on stages
merge m:m randomid black stat taste employer_taste customer_taste inaccurate_stat accurate_stat using `stage_one'
drop _merge

sort randomid stage_two


*** Create variable, to be called "ordering," which takes on value of 1-4***
*It indicates the first, second, third, and fourth scenario encountered.
* The "fl_###_do" variables from the raw data helps indicate the order in which respondents encountered scenarios throughout the survey flow.
gen stage1_scenarios = ""
replace stage1_scenarios = fl_178_do if fl_178_do != ""
replace stage1_scenarios = fl_217_do if fl_217_do != ""
replace stage1_scenarios = fl_235_do if fl_235_do != ""
replace stage1_scenarios = fl_201_do if fl_201_do != ""
replace stage1_scenarios = fl_408_do if fl_408_do != ""
replace stage1_scenarios = fl_459_do if fl_459_do != ""
replace stage1_scenarios = fl_498_do if fl_498_do != ""
replace stage1_scenarios = fl_535_do if fl_535_do != ""

gen ordering = 1 if stage_one == 1 & (stage1_scenarios == "(BLACK,inaccuratestatistical)|(BLACK,accuratestatistical)" |stage1_scenarios == "(BLACK,inaccuratestatistical)Andrew|(BLACK,accuratestatistical)Andrew") & black1 == 1 & inaccurate_stat == 1
replace ordering = 2 if stage_one == 1 & (stage1_scenarios == "(BLACK,inaccuratestatistical)|(BLACK,accuratestatistical)"|stage1_scenarios == "(BLACK,inaccuratestatistical)Andrew|(BLACK,accuratestatistical)Andrew") & black1 == 1 & accurate_stat == 1 

replace ordering = 1 if stage_one == 1 & (stage1_scenarios == "(WHITE,inaccuratestatistical)|(WHITE,accuratestatistical)"|stage1_scenarios == "(WHITE,inaccuratestatistical)Andrew|(WHITE,accuratestatistical)Andrew") & black1 == 0 & inaccurate_stat == 1 
replace ordering = 2 if stage_one == 1 & (stage1_scenarios == "(WHITE,inaccuratestatistical)|(WHITE,accuratestatistical)"|stage1_scenarios == "(WHITE,inaccuratestatistical)Andrew|(WHITE,accuratestatistical)Andrew") & black1 == 0 & accurate_stat == 1 

replace ordering = 1 if stage_one == 1 & (stage1_scenarios == "(BLACK,accuratestatistical)|(BLACK,inaccuratestatistical)"|stage1_scenarios == "(BLACK,accuratestatistical)Andrew|(BLACK,inaccuratestatistical)Andrew") & black1 == 1 & accurate_stat == 1 
replace ordering = 2 if stage_one == 1 & (stage1_scenarios == "(BLACK,accuratestatistical)|(BLACK,inaccuratestatistical)"|stage1_scenarios == "(BLACK,accuratestatistical)Andrew|(BLACK,inaccuratestatistical)Andrew") & black1 == 1 & inaccurate_stat == 1 

replace ordering = 1 if stage_one == 1 & (stage1_scenarios == "(WHITE,accuratestatistical)|(WHITE,inaccuratestatistical)"|stage1_scenarios == "(WHITE,accuratestatistical)Andrew|(WHITE,inaccuratestatistical)Andrew") & black1 == 0 & accurate_stat == 1 
replace order = 2 if stage_one == 1 & (stage1_scenarios == "(WHITE,accuratestatistical)|(WHITE,inaccuratestatistical)"|stage1_scenarios == "(WHITE,accuratestatistical)Andrew|(WHITE,inaccuratestatistical)Andrew") & black1 == 0 & inaccurate_stat == 1 

replace ordering = 1 if stage_one == 1 & (stage1_scenarios == "(BLACK,employertaste)|(BLACK,customertaste)"|stage1_scenarios == "(BLACK,employertaste)Andrew|(BLACK,customertaste)Andrew") & black1 == 1 & employer_taste == 1 
replace order = 2 if stage_one == 1 & (stage1_scenarios == "(BLACK,employertaste)|(BLACK,customertaste)"|stage1_scenarios == "(BLACK,employertaste)Andrew|(BLACK,customertaste)Andrew") & black1 == 1 & customer_taste == 1 

replace ordering = 1 if stage_one == 1 & (stage1_scenarios == "(WHITE,employertaste)|(WHITE,customertaste)"|stage1_scenarios == "(WHITE,employertaste)Andrew|(WHITE,customertaste)Andrew") & black1 == 0 & employer_taste == 1 
replace order = 2 if stage_one == 1 & (stage1_scenarios == "(WHITE,employertaste)|(WHITE,customertaste)"|stage1_scenarios == "(WHITE,employertaste)Andrew|(WHITE,customertaste)Andrew") & black1 == 0 & customer_taste == 1 

replace ordering = 1 if stage_one == 1 & (stage1_scenarios == "(BLACK,customertaste)|(BLACK,employertaste)"|stage1_scenarios == "(BLACK,customertaste)Andrew|(BLACK,employertaste)Andrew") & black1 == 1 & customer_taste == 1 
replace order = 2 if stage_one == 1 & (stage1_scenarios == "(BLACK,customertaste)|(BLACK,employertaste)"|stage1_scenarios == "(BLACK,customertaste)Andrew|(BLACK,employertaste)Andrew") & black1 == 1 & employer_taste == 1 

replace ordering = 1 if stage_one == 1 & (stage1_scenarios == "(WHITE,customertaste)|(WHITE,employertaste)"|stage1_scenarios == "(WHITE,customertaste)Andrew|(WHITE,employertaste)Andrew") & black1 == 0 & customer_taste == 1 
replace ordering = 2 if stage_one == 1 & (stage1_scenarios == "(WHITE,customertaste)|(WHITE,employertaste)"|stage1_scenarios == "(WHITE,customertaste)Andrew|(WHITE,employertaste)Andrew") & black1 == 0 & employer_taste == 1 

replace ordering = 4 if open_response != "" & fair !=.
replace ordering = 3 if ordering ==. & fair !=. & stage_two == 1


*** Create variables for type of scenario encountered first ***
gen low_just1 = (low_just == 1 & order == 1)
gen low_just3 = (low_just == 1 & order == 3)

sort randomid order 

replace low_just1 = 1 if order == 2 & low_just1[_n - 1] == 1
replace low_just3 = 1 if order == 4 & low_just3[_n - 1] == 1

gen stat1 = (taste1 == 0)
gen stat2 = (taste2 == 0)

gen black_only = (black1 == 1 & black2 == 1)

gen white_only = (white1 == 1 & white2 == 1)

gen switcher = (black_only == 0 & white_only == 0)

rename v17 ind


*** Transform fairness variables ***
replace fair = fair - 4
replace bro = bro - 4


**** Drop some observations (primarily suspected to be automated responses as well as out-of-sample respone, e.g., a recorded test-run) ***
drop if low == 1 
drop if unrelated == 1
drop if rejected == 1
drop if copy_internet == 1
drop if copy_survey == 1
drop if same == 1
drop if foreign == 1
drop if randomid == 41588


*** Deal with creating weights ***
* Create categorical variables (useful for making weights)
gen views2 = (extremely_conservative)
replace views2 = 2 if conservative == 1
replace views2 = 3 if slightly_conservative == 1
replace views2 = 4 if moderate == 1
replace views2 = 5 if slightly_liberal == 1
replace views2 = 6 if liberal == 1
replace views2 = 7 if extremely_liberal == 1

gen gender2 = (male == 1)
replace gender2 = 2 if female == 1

gen edu2 = (education_hs == 1 |education_some_college == 1)
replace edu2 = 2 if education_four_college == 1 | education_higher == 1

gen age2 = (age18_24 == 1)
replace age2 = 2 if age25_34 == 1 | age35_44 == 1
replace age2 = 3 if age45_54 == 1 | age55_64 == 1 | age65_74 == 1

gen race2 = (race_white==1)
replace race2 = 2 if race_white == 0



* Export a data set used to create weights
preserve 

keep randomid age2 edu2 gender2 race2

export delimited using "ii_mturk_data_for_weights", replace

restore

*---------------------------------- STOP HERE IF RUN R CODES       -------------------------------------

*---------------------------------- RUN BELOW IF WEIGHTS ARE READY -------------------------------------

* Add weights (these dta files correspond to output of the acs_weights.R and gss_weights.R files)

merge m:m age2 edu2 gender2 race2 using cc_acs_weights.dta

drop if _merge != 3
drop _merge 

merge m:m age2 edu2 gender2 race2 using ff_sample_weights.dta

drop if _merge != 3
drop _merge 

merge m:m views2 using ee_gss_weights

drop if _merge != 3
drop _merge 


gen survey_weight = pop_share/sample_share


drop age2 edu2 gender2 race2 fl_178_do fl_217_do fl_235_do fl_201_do fl_408_do fl_459_do fl_498_do fl_535_do fl_401_do views2 low unrelated rejected copy_internet copy_survey same foreign employer_taste customer_taste inaccurate_stat accurate_stat open_response2 extra stage1_scenarios gender_decline county share_black share_asian share_hispanic share_white share_indigenous share_islander share_foreign mean_income share_trump unemployment_ share_urban_state share_urban_county sample_share pop_share


***************** SAVE DATA
cd $homedir
save "2_Main Analyses/d1_when_is_discrimination_unfair_data_set.dta", replace
